Purpose: Demonstrate Model Creation using AWS boto3 sdk using Iris Multi-class dataset
Steps:
In [1]:
    
import boto3
import os
    
In [2]:
    
# Local Data Path
# Training, Eval Files
# Batch Evaluation Files
# Download Batch result to local drive
data_path = r'..\..\Data\ModelCreationDemo'
# Training and Test Files
training_file = 'iris_data_train.csv'
training_schema_file = 'iris_data_train.csv.schema'
batch_test_file = 'iris_data_classifier_test.csv'
    
In [3]:
    
# S3 Bucket name - bucket name needs to be globally unique across AWS (not just your account)
# Example (make sure you modify it to point to your s3 bucket)
s3_bucket_name = 'ml-course'
s3_folder_name = 'boto3_demo' 
# s3_path will be s3_bucket_name/s3_folder_name/
    
In [4]:
    
# Initialize Session with appropriate user profile.
# Optionally, use a different profile (profile_name='mluser') instead of default profile
# Specify region where you want to create your ML Models and files
# http://docs.aws.amazon.com/machine-learning/latest/dg/regions-and-endpoints.html
# currently ML is available in US East (N. Virginia)	us-east-1 and EU (Ireland)	eu-west-1
session   = boto3.Session(region_name = 'us-east-1',
                          profile_name = 'ml_user')
ml_client = session.client('machinelearning' )
    
In [5]:
    
def upload_files_to_s3():    
    s3Client = session.resource('s3')
    fileNames = [training_file, training_schema_file, batch_test_file]
    for fileName in fileNames:
        filePath = os.path.join(data_path,fileName)
        print(filePath)        
        # upload files to s3 bucket.
        s3Client.Bucket(s3_bucket_name).upload_file(filePath, s3_folder_name + '/' + fileName)
    
In [6]:
    
upload_files_to_s3()
    
    
In [7]:
    
# Derived from
#https://github.com/awslabs/machine-learning-samples/blob/master/social-media/create-aml-model.py
def create_data_source(dataset_name, 
                       s3_data_uri, 
                       s3_schema_uri, 
                       ds_type, percent_begin, 
                       percent_end, 
                       compute_statistics):
    ds_id = "ds-boto3-iris-{0}".format(ds_type)
    data_spec = {}
    data_spec['DataLocationS3'] = s3_data_uri
    data_spec['DataSchemaLocationS3'] = s3_schema_uri
    data_spec['DataRearrangement'] = \
        '{{"splitting":{{"percentBegin":{0},"percentEnd":{1},"strategy":"sequential"}}}}'.format(
        percent_begin, percent_end)
    
    response = ml_client.create_data_source_from_s3(
        DataSourceId = ds_id,
        DataSourceName = "{0}_[percentBegin={1}, percentEnd={2}]".format(dataset_name, percent_begin, percent_end),
        DataSpec = data_spec,    
        ComputeStatistics = compute_statistics)
    
    print("Creating {0} datasource".format(ds_type))
    return response
    
In [8]:
    
# Create Training Data Source
s3_train_uri = "s3://{0}/{1}/{2}".format(s3_bucket_name, s3_folder_name, training_file)
s3_train_schema_uri = "s3://{0}/{1}/{2}".format(s3_bucket_name, s3_folder_name, training_schema_file)
    
In [9]:
    
s3_train_uri, s3_train_schema_uri
    
    Out[9]:
In [10]:
    
train_datasource = create_data_source(
    'iris_training', 
    s3_train_uri,
    s3_train_schema_uri,
    'Training',0, 70, True)
    
    
In [11]:
    
eval_datasource = create_data_source(
    'iris_evaluation', 
    s3_train_uri,
    s3_train_schema_uri,
    'Evaluation', 70, 100, False)
    
    
In [12]:
    
print(train_datasource['DataSourceId'])
print(eval_datasource['DataSourceId'])
    
    
In [13]:
    
model_create_response = ml_client.create_ml_model(
    MLModelId = 'ml-iris-demo',
    MLModelName = 'ML model: iris-demo-from-code',
    MLModelType = 'MULTICLASS',    
    TrainingDataSourceId = train_datasource['DataSourceId'])
    
In [14]:
    
model_create_response
    
    Out[14]:
In [15]:
    
# Query 'Status': 'PENDING'|'INPROGRESS'|'FAILED'|'COMPLETED'|'DELETED', 
ml_client.get_ml_model(MLModelId = model_create_response['MLModelId'])['Status']
    
    Out[15]:
In [16]:
    
evaluation_response = ml_client.create_evaluation (
    EvaluationId = 'eval-iris-demo',
    EvaluationName = 'Eval ML model: iris-demo-from-code',
    MLModelId = model_create_response['MLModelId'],    
    EvaluationDataSourceId = eval_datasource['DataSourceId'])
    
In [17]:
    
evaluation_response
    
    Out[17]:
In [18]:
    
#Query 'Status': 'PENDING'|'INPROGRESS'|'FAILED'|'COMPLETED'|'DELETED'
eval_result = ml_client.get_evaluation(EvaluationId = evaluation_response['EvaluationId'])
    
In [19]:
    
eval_result['Status']
    
    Out[19]:
In [20]:
    
eval_result['PerformanceMetrics']
    
    Out[20]:
In [21]:
    
eval_result
    
    Out[21]: